Analysing and Visualising Spatio-temporal Patterns of COVID-19 in DKI Jakarta, Indonesia
packages = c('sf', 'tidyverse','readxl','maptools','dplyr', 'raster','spatstat', 'tmap')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
data_name_list <- list(
"Standar Kelurahan Data Corona (31 Maret 2020 Pukul 08.00)",
"Standar Kelurahan Data Corona (30 April 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 MEI 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (30 Juni 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 Juli 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 Agustus 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 September 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Oktober 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 November 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Desember 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 Januari 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (28 Februari 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Maret 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 April 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Mei 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 Juni 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Juli 2021 Pukul 10.00)")
data_refer_list <- list("03_2020", "04_2020", "05_2020", "06_2020", "07_2020", "08_2020", "09_2020", "10_2020", "11_2020", "12_2020", "01_2021", "02_2021", "03_2021", "04_2021", "05_2021", "06_2021", "07_2021")
for (x in 1:length(data_name_list)) {
path = paste("data/",data_name_list[x],".xlsx", sep = "")
df= read_excel(path, sheet = "data")
# from March 2020 to June 2020 raw data excels have double "ID_KEL"
if (x < 5) {
df$ID_KEL...2 <- NULL
names(df)[names(df) == 'ID_KEL...1'] <- 'ID_KEL'
}
# from July 2020 onward raw data excels have double "Meninggal"
else if (x == 5) {
df$Meninggal...21 <- NULL
names(df)[names(df) == 'Meninggal...26'] <- 'Meninggal'
}
else if (x == 6) {
df$Meninggal...23 <- NULL
names(df)[names(df) == 'Meninggal...28'] <- 'Meninggal'
}
else if (x == 7) {
df$Meninggal...24 <- NULL
names(df)[names(df) == 'Meninggal...29'] <- 'Meninggal'
}
else if (x == 8 | x == 9 | x == 10) {
df$Meninggal...25 <- NULL
names(df)[names(df) == 'Meninggal...30'] <- 'Meninggal'
}
else if (x > 10) {
df$Meninggal...26 <- NULL
names(df)[names(df) == 'Meninggal...31'] <- 'Meninggal'
}
df <- df[,c("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal")]
df$'month' <- data_refer_list[x]
df <- df[-c(1), ]
df <- df[(df$Nama_provinsi=="DKI JAKARTA"),]
df_name = paste("df_", data_refer_list[x], sep = "")
assign(df_name, df)
}
temp_bind_df <- rbind(df_03_2020,df_04_2020)
temp_bind_df <- rbind(temp_bind_df,df_05_2020)
temp_bind_df <- rbind(temp_bind_df,df_06_2020)
temp_bind_df <- rbind(temp_bind_df,df_07_2020)
temp_bind_df <- rbind(temp_bind_df,df_08_2020)
temp_bind_df <- rbind(temp_bind_df,df_09_2020)
temp_bind_df <- rbind(temp_bind_df,df_10_2020)
temp_bind_df <- rbind(temp_bind_df,df_11_2020)
temp_bind_df <- rbind(temp_bind_df,df_12_2020)
temp_bind_df <- rbind(temp_bind_df,df_01_2021)
temp_bind_df <- rbind(temp_bind_df,df_02_2021)
temp_bind_df <- rbind(temp_bind_df,df_03_2021)
temp_bind_df <- rbind(temp_bind_df,df_04_2021)
temp_bind_df <- rbind(temp_bind_df,df_05_2021)
temp_bind_df <- rbind(temp_bind_df,df_06_2021)
binded_df <- rbind(temp_bind_df,df_07_2021)
DJ = st_read(dsn = "data",
layer = "BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA")
Reading layer `BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA' from data source `C:\yiling-yu\IS415_Blog\_take_home_exercises\Take-home Exercise 1\data'
using driver `ESRI Shapefile'
Simple feature collection with 269 features and 161 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 106.3831 ymin: -6.370815 xmax: 106.9728 ymax: -5.184322
Geodetic CRS: WGS 84
#EPSG for DGN95 / Indonesia TM-3 zone 54.1: 23845
DJ_sf <- st_transform(DJ, crs = 23845)
st_geometry(DJ_sf)
Geometry set for 269 features
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -3691981 ymin: 663887.8 xmax: -3606237 ymax: 815440.9
Projected CRS: DGN95 / Indonesia TM-3 zone 54.1
First 5 geometries:
have a look at the map to identify outer islands
tmap_mode('view')
tm_shape(DJ_sf) +
tm_polygons()
remove the rows that have column “KAB_KOTA” == “KEPULAUAN SERIBU”
DJ_sf <- DJ_sf[!(DJ_sf$KAB_KOTA == "KEPULAUAN SERIBU"),]
DJ_sf <- DJ_sf %>%
drop_na()
have a look at the map again to confirm islands are excluded
tmap_mode('plot')
tm_shape(DJ_sf) +
tm_polygons()

DJ_sf = DJ_sf[,c(1:9)]
nama_kelurahan_values <- unique(binded_df[c("nama_kelurahan")])
DESA_KELUR_values <- unique(DJ_sf[c("DESA_KELUR")]) %>%
st_set_geometry(NULL)
no_match <- list()
for (x in nama_kelurahan_values$nama_kelurahan) {
if (x %in% DESA_KELUR_values$DESA_KELUR == FALSE){
no_match <- append(no_match,x)
}
}
no_match
[[1]]
[1] "PINANG RANTI"
[[2]]
[1] "BALE KAMBANG"
[[3]]
[1] "PAL MERIAM"
[[4]]
[1] "JATI PULO"
[[5]]
[1] "KALI BARU"
[[6]]
[1] "RAWA JATI"
[[7]]
[1] "KERENDANG"
[[8]]
[1] "KAMPUNG TENGAH"
[[9]]
[1] "KRAMAT JATI"
[[10]]
[1] "HALIM PERDANA KUSUMAH"
[[11]]
[1] "P. HARAPAN"
[[12]]
[1] "P. KELAPA"
[[13]]
[1] "P. PANGGANG"
[[14]]
[1] "P. PARI"
[[15]]
[1] "P. TIDUNG"
[[16]]
[1] "UNTUNG JAWA"
[[17]]
[1] "PULAU HARAPAN"
[[18]]
[1] "PULAU KELAPA"
[[19]]
[1] "PULAU PANGGANG"
[[20]]
[1] "PULAU PARI"
[[21]]
[1] "PULAU TIDUNG"
[[22]]
[1] "PULAU UNTUNG JAWA"
for (x in no_match) {
if (x == "KERENDANG" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "KRENDANG"
}
else if (x == "KAMPUNG TENGAH" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "TENGAH"
}
else if (x == "HALIM PERDANA KUSUMAH" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "HALIM PERDANA KUSUMA"
}
else if (x == "P. HARAPAN" | x == "P. KELAPA" | x == "P. PANGGANG" | x == "P. PARI" | x == "P. TIDUNG" | x == "UNTUNG JAWA" | x == "PULAU HARAPAN" | x == "PULAU KELAPA" | x == "PULAU PANGGANG" | x == "PULAU PARI"| x == "PULAU TIDUNG" | x == "PULAU UNTUNG JAWA"){
binded_df<-binded_df[!(binded_df$nama_kelurahan == x),]
}
else {
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- str_replace_all(string=x, pattern=" ", repl="")
}
}
nama_kelurahan_values <- unique(binded_df[c("nama_kelurahan")])
DESA_KELUR_values <- unique(DJ_sf[c("DESA_KELUR")]) %>%
st_set_geometry(NULL)
no_match <- list()
for (x in nama_kelurahan_values$nama_kelurahan) {
if (x %in% DESA_KELUR_values$DESA_KELUR == FALSE){
no_match <- append(no_match,x)
}
}
no_match
list()
DJ_covid <- left_join(DJ_sf, binded_df,
by = c("DESA_KELUR" = "nama_kelurahan"))
colSums(is.na(DJ_covid))
OBJECT_ID KODE_DESA DESA KODE
0 0 0 0
PROVINSI KAB_KOTA KECAMATAN DESA_KELUR
0 0 0 0
JUMLAH_PEN ID_KEL Nama_provinsi nama_kota
0 0 0 0
nama_kecamatan POSITIF Meninggal month
0 0 0 0
geometry
0
#column names
#POSITIF (cumulative confirmed cases)
#Meninggal (cumulative death cases)
#JUMLAH_PEN (Total Population)
DJ_covid <- DJ_covid %>%
mutate(`POSITIF%` = (`POSITIF`
/`JUMLAH_PEN`)*10000) %>%
mutate(`Meninggal%` = (`Meninggal`
/`JUMLAH_PEN`)*10000)
cleaned_df <- write_rds(DJ_covid, "data/DJ_covid.rds")
#column names
#POSITIF (cumulative confirmed cases)
#Meninggal (cumulative death cases)
#JUMLAH_PEN (Total Population)
#POSITIF% (cumulative confirmed cases rate)
#Meninggal% (cumulative death cases rate)
#month
grouped_df <- cleaned_df %>%
group_by(month) %>%
summarise(`S_POSITIF` = sum(`POSITIF`), `S_Meninggal` = sum(`Meninggal`)) %>%
st_set_geometry(NULL)
grouped_df <- as.data.frame(lapply(grouped_df, unlist))
#arrange the order of month
grouped_df$month <- factor(grouped_df$month, levels = rev(grouped_df$month))
ggplot(data = grouped_df,
aes(x = month,
y = S_POSITIF))+
geom_bar(stat = "identity", color="black", fill="light blue" )+
labs(title = "Histogram of monthly cumulative confirmed cases in DKI JAKARTA",
x = "month-year",
y = "confirmed cases") +
coord_flip()

#arrange the order of month
grouped_df$month <- factor(grouped_df$month, levels = rev(grouped_df$month))
ggplot(data = grouped_df,
aes(x = month,
y = S_Meninggal))+
geom_bar(stat = "identity", color="black", fill="light blue" )+
labs(title = "Histogram of monthly cumulative death cases in DKI JAKARTA",
x = "month-year",
y = "death cases") +
coord_flip()

#classification: quantile
quantile_pop <- tm_shape(cleaned_df)+
tm_polygons("JUMLAH_PEN",
n = 6,
style = "quantile",
palette = "Blues")+
tm_layout(main.title = "Quantile")
#classification: natural breaks
jenks_pop <- tm_shape(cleaned_df)+
tm_polygons("JUMLAH_PEN",
n = 6,
style = "jenks",
palette = "Blues")+
tm_layout(main.title = "Natural breaks")
#classification: equal interval
equal_pop <- tm_shape(cleaned_df)+
tm_polygons("JUMLAH_PEN",
n = 6,
style = "equal",
palette = "Blues")+
tm_layout(main.title = "Equal interval")
tmap_arrange(quantile_pop, jenks_pop, equal_pop, ncol=3, nrow=1)

#get.var function
get.var <- function(vname,df) {
v <- df[vname] %>% st_set_geometry(NULL)
v <- unname(v[,1])
return(v)
}
#boxbreaks function
boxbreaks <- function(v,mult=1.5) {
qv <- unname(quantile(v))
iqr <- qv[4] - qv[2]
upfence <- qv[4] + mult * iqr
lofence <- qv[2] - mult * iqr
# initialize break points vector
bb <- vector(mode="numeric",length=7)
# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- floor(qv[1])
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- ceiling(qv[5])
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]
return(bb)
}
#boxmap function
boxmap <- function(vnam, df,
legtitle=NA,
bb,
mtitle,
mult=1.5){
tm_shape(df) +
tm_fill(vnam,
title=legtitle,
breaks=bb,
palette="Reds",
labels = c("lower outlier",
"< 25%",
"25% - 50%",
"50% - 75%",
"> 75%",
"upper outlier")) +
tm_borders() +
tm_layout(main.title = mtitle,
main.title.size = 4,
main.title.position = "center",
legend.title.size = 2,
legend.text.size = 2,)
}
cleaned_df_03_2020<- cleaned_df %>% filter(month=="03_2020")
cleaned_df_04_2020<- cleaned_df %>% filter(month=="04_2020")
cleaned_df_05_2020<- cleaned_df %>% filter(month=="05_2020")
cleaned_df_06_2020<- cleaned_df %>% filter(month=="06_2020")
cleaned_df_07_2020<- cleaned_df %>% filter(month=="07_2020")
cleaned_df_08_2020<- cleaned_df %>% filter(month=="08_2020")
cleaned_df_09_2020<- cleaned_df %>% filter(month=="09_2020")
cleaned_df_10_2020<- cleaned_df %>% filter(month=="10_2020")
cleaned_df_11_2020<- cleaned_df %>% filter(month=="11_2020")
cleaned_df_12_2020<- cleaned_df %>% filter(month=="12_2020")
cleaned_df_01_2021<- cleaned_df %>% filter(month=="01_2021")
cleaned_df_02_2021<- cleaned_df %>% filter(month=="02_2021")
cleaned_df_03_2021<- cleaned_df %>% filter(month=="03_2021")
cleaned_df_04_2021<- cleaned_df %>% filter(month=="04_2021")
cleaned_df_05_2021<- cleaned_df %>% filter(month=="05_2021")
cleaned_df_06_2021<- cleaned_df %>% filter(month=="06_2021")
cleaned_df_07_2021<- cleaned_df %>% filter(month=="07_2021")
#column names
#POSITIF% (cumulative confirmed cases rate)
#Meninggal% (cumulative death cases rate)
#month
#get boxbreaks for POSITIF%, use accumulated final value in month July 2021
end <- cleaned_df %>%
filter(month=="07_2021")
var_p <- get.var("POSITIF%",end)
bb_p <- boxbreaks(var_p,mult=1.5)
#create raw rate boxmaps for each month
tmap_p_03_2020 <- boxmap("POSITIF%", cleaned_df_03_2020, bb=bb_p, mtitle = "03_2020")
tmap_p_04_2020 <- boxmap("POSITIF%", cleaned_df_04_2020, bb=bb_p, mtitle = "04_2020")
tmap_p_05_2020 <- boxmap("POSITIF%", cleaned_df_05_2020, bb=bb_p, mtitle = "05_2020")
tmap_p_06_2020 <- boxmap("POSITIF%", cleaned_df_06_2020, bb=bb_p, mtitle = "06_2020")
tmap_p_07_2020 <- boxmap("POSITIF%", cleaned_df_07_2020, bb=bb_p, mtitle = "07_2020")
tmap_p_08_2020 <- boxmap("POSITIF%", cleaned_df_08_2020, bb=bb_p, mtitle = "08_2020")
tmap_p_09_2020 <- boxmap("POSITIF%", cleaned_df_09_2020, bb=bb_p, mtitle = "09_2020")
tmap_p_10_2020 <- boxmap("POSITIF%", cleaned_df_10_2020, bb=bb_p, mtitle = "10_2020")
tmap_p_11_2020 <- boxmap("POSITIF%", cleaned_df_11_2020, bb=bb_p, mtitle = "11_2020")
tmap_p_12_2020 <- boxmap("POSITIF%", cleaned_df_12_2020, bb=bb_p, mtitle = "12_2020")
tmap_p_01_2021 <- boxmap("POSITIF%", cleaned_df_01_2021, bb=bb_p, mtitle = "01_2021")
tmap_p_02_2021 <- boxmap("POSITIF%", cleaned_df_02_2021, bb=bb_p, mtitle = "02_2021")
tmap_p_03_2021 <- boxmap("POSITIF%", cleaned_df_03_2021, bb=bb_p, mtitle = "03_2021")
tmap_p_04_2021 <- boxmap("POSITIF%", cleaned_df_04_2021, bb=bb_p, mtitle = "04_2021")
tmap_p_05_2021 <- boxmap("POSITIF%", cleaned_df_05_2021, bb=bb_p, mtitle = "05_2021")
tmap_p_06_2021 <- boxmap("POSITIF%", cleaned_df_06_2021, bb=bb_p, mtitle = "06_2021")
tmap_p_07_2021 <- boxmap("POSITIF%", cleaned_df_07_2021, bb=bb_p, mtitle = "07_2021")
tmap_mode("plot")
tmap_arrange(tmap_p_03_2020,tmap_p_04_2020,tmap_p_05_2020,tmap_p_06_2020,tmap_p_07_2020,tmap_p_08_2020,tmap_p_09_2020,tmap_p_10_2020,tmap_p_11_2020,tmap_p_12_2020,tmap_p_01_2021,tmap_p_02_2021,tmap_p_03_2021,tmap_p_04_2021,tmap_p_05_2021,tmap_p_06_2021,tmap_p_07_2021, ncol=3)

#column names
#POSITIF% (cumulative confirmed cases rate)
#Meninggal% (cumulative death cases rate)
#month
#get boxbreaks for Meninggal%, use accumulated final value in month July 2021
end <- cleaned_df %>%
filter(month=="07_2021")
var_m <- get.var("Meninggal%",end)
bb_m <- boxbreaks(var_m,mult=1.5)
#create raw rate boxmaps for each month
tmap_m_03_2020 <- boxmap("Meninggal%", cleaned_df_03_2020, bb=bb_m, mtitle = "03_2020")
tmap_m_04_2020 <- boxmap("Meninggal%", cleaned_df_04_2020, bb=bb_m, mtitle = "04_2020")
tmap_m_05_2020 <- boxmap("Meninggal%", cleaned_df_05_2020, bb=bb_m, mtitle = "05_2020")
tmap_m_06_2020 <- boxmap("Meninggal%", cleaned_df_06_2020, bb=bb_m, mtitle = "06_2020")
tmap_m_07_2020 <- boxmap("Meninggal%", cleaned_df_07_2020, bb=bb_m, mtitle = "07_2020")
tmap_m_08_2020 <- boxmap("Meninggal%", cleaned_df_08_2020, bb=bb_m, mtitle = "08_2020")
tmap_m_09_2020 <- boxmap("Meninggal%", cleaned_df_09_2020, bb=bb_m, mtitle = "09_2020")
tmap_m_10_2020 <- boxmap("Meninggal%", cleaned_df_10_2020, bb=bb_m, mtitle = "10_2020")
tmap_m_11_2020 <- boxmap("Meninggal%", cleaned_df_11_2020, bb=bb_m, mtitle = "11_2020")
tmap_m_12_2020 <- boxmap("Meninggal%", cleaned_df_12_2020, bb=bb_m, mtitle = "12_2020")
tmap_m_01_2021 <- boxmap("Meninggal%", cleaned_df_01_2021, bb=bb_m, mtitle = "01_2021")
tmap_m_02_2021 <- boxmap("Meninggal%", cleaned_df_02_2021, bb=bb_m, mtitle = "02_2021")
tmap_m_03_2021 <- boxmap("Meninggal%", cleaned_df_03_2021, bb=bb_m, mtitle = "03_2021")
tmap_m_04_2021 <- boxmap("Meninggal%", cleaned_df_04_2021, bb=bb_m, mtitle = "04_2021")
tmap_m_05_2021 <- boxmap("Meninggal%", cleaned_df_05_2021, bb=bb_m, mtitle = "05_2021")
tmap_m_06_2021 <- boxmap("Meninggal%", cleaned_df_06_2021, bb=bb_m, mtitle = "06_2021")
tmap_m_07_2021 <- boxmap("Meninggal%", cleaned_df_07_2021, bb=bb_m, mtitle = "07_2021")
tmap_arrange(tmap_m_03_2020,tmap_m_04_2020,tmap_m_05_2020,tmap_m_06_2020,tmap_m_07_2020,tmap_m_08_2020,tmap_m_09_2020,tmap_m_10_2020,tmap_m_11_2020,tmap_m_12_2020,tmap_m_01_2021,tmap_m_02_2021,tmap_m_03_2021,tmap_m_04_2021,tmap_m_05_2021,tmap_m_06_2021,tmap_m_07_2021, ncol=3)

sum_observed <- sum(cleaned_df_07_2021$POSITIF)
sum_population <- sum(cleaned_df_07_2021$JUMLAH_PEN)
p_i <- sum_observed / sum_population
E_i <- p_i * cleaned_df_07_2021$JUMLAH_PEN
cleaned_df_07_2021$smr_p <- cleaned_df_07_2021$POSITIF / E_i
erm_covid <- tm_shape(cleaned_df_07_2021) +
tm_fill("smr_p",title="Excess risk of getting covid",breaks=c(-100,.25,.5,1,2,4,1000),labels = c("<.25", ".25 - .50", ".50 - 1.00","1.00 - 2.00", "2.00 - 4.00", "> 4.00" ), palette = "-RdBu") +
tm_borders()
sum_observed <- sum(cleaned_df_07_2021$Meninggal)
sum_population <- sum(cleaned_df_07_2021$JUMLAH_PEN)
p_i <- sum_observed / sum_population
E_i <- p_i * cleaned_df_07_2021$JUMLAH_PEN
cleaned_df_07_2021$smr_d <- cleaned_df_07_2021$Meninggal / E_i
erm_death <- tm_shape(cleaned_df_07_2021) +
tm_fill("smr_d",title="Excess risk of death",breaks=c(-100,.25,.5,1,2,4,1000),labels = c("<.25", ".25 - .50", ".50 - 1.00","1.00 - 2.00", "2.00 - 4.00", "> 4.00" ), palette = "-RdBu") +
tm_borders()
tmap_mode("view")
tmap_arrange(erm_covid,erm_death,ncol = 2)